Necessary Packages

library(vegan)
library(ggplot2)
library(ggpubr)
library(pairwiseAdonis)
library(reshape2)
library(phyloseq)
library(userfriendlyscience)
library(microbiome)
library(themetagenomics)
library(RColorBrewer)
library(car)
library(betareg)
library(fitdistrplus)
library(emmeans)
library(multcompView)

NMDS Analysis - Comparing Beta Diversity between Samples

Individual File: CH_1_NMDS_Environment.Rmd

Combine all similar taxa by ‘Genus’

CH_1_ALL<-read.csv(file ="~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/CH_1_seqtab_ALL_COMBINED.csv", header = TRUE, check.names = FALSE, row.names = 1)

CH_1_ALL<-as.matrix(CH_1_ALL)
CH_1_ALL_otu<-otu_table(CH_1_ALL, taxa_are_rows = TRUE)

CH_1_ALL_metadata<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/NMDS/CH_1_ALL_Metadata.csv", header=TRUE, check.names = FALSE, row.names = 1)

CH_1_ALL_metadata<-sample_data(CH_1_ALL_metadata)

CH_1_taxa<-read.csv(file ="~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Taxa_Graphs/CH_1_Clean_seq_REFERENCE.csv", header = TRUE, check.names = FALSE, row.names = 1)

CH_1_taxa<-as.matrix(CH_1_taxa)
CH_1_taxa_Phylo<-tax_table(CH_1_taxa)

CH_1_ALL_Phylo<-phyloseq(CH_1_ALL_otu,CH_1_taxa_Phylo,CH_1_ALL_metadata)

CH_1_ALL_melted<-tax_glom(CH_1_ALL_Phylo, "Genus")

write.csv(otu_table(CH_1_ALL_melted), "CH_1_seqtab_ALL_COMBINED.csv")

ALL (H2O + NP) NMDS:

CH_1_ALL_ASV<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/CH_1_seqtab_ALL_COMBINED.csv", header = TRUE, check.names = FALSE, row.names = 1)

CH_1_ALL_ASV_metadata<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/NMDS/CH_1_ALL_Metadata.csv", header=TRUE, check.names = FALSE)

CH_1_ALL_t_ASV<-as.data.frame(t(CH_1_ALL_ASV))
CH_1_ALL_ASV_min_depth<-min(colSums(CH_1_ALL_ASV))
CH_1_ALL_t_ASV_rarefied<-as.data.frame(round(rrarefy(CH_1_ALL_t_ASV,10835)))
CH_1_ALL_ASV_dist = as.matrix(vegdist(CH_1_ALL_t_ASV_rarefied, "bray"))
CH_1_ALL_ASV_NMDS = metaMDS(CH_1_ALL_ASV_dist)
## Run 0 stress 0.106868 
## Run 1 stress 0.09902619 
## ... New best solution
## ... Procrustes: rmse 0.0545745  max resid 0.3373512 
## Run 2 stress 0.1101984 
## Run 3 stress 0.09893444 
## ... New best solution
## ... Procrustes: rmse 0.01040372  max resid 0.05190541 
## Run 4 stress 0.1013031 
## Run 5 stress 0.112083 
## Run 6 stress 0.09927236 
## ... Procrustes: rmse 0.010096  max resid 0.05008335 
## Run 7 stress 0.09914968 
## ... Procrustes: rmse 0.01664224  max resid 0.05253555 
## Run 8 stress 0.1111629 
## Run 9 stress 0.1014855 
## Run 10 stress 0.1067653 
## Run 11 stress 0.1071975 
## Run 12 stress 0.1014456 
## Run 13 stress 0.1013426 
## Run 14 stress 0.1063761 
## Run 15 stress 0.1118371 
## Run 16 stress 0.09940842 
## ... Procrustes: rmse 0.01363718  max resid 0.05126574 
## Run 17 stress 0.1099479 
## Run 18 stress 0.09881693 
## ... New best solution
## ... Procrustes: rmse 0.008617948  max resid 0.05246118 
## Run 19 stress 0.1070479 
## Run 20 stress 0.111242 
## *** No convergence -- monoMDS stopping criteria:
##     20: stress ratio > sratmax
CH_1_ALL_ASV_MDS1 <-CH_1_ALL_ASV_NMDS$points[,1]
CH_1_ALL_ASV_MDS2<-CH_1_ALL_ASV_NMDS$points[,2]
CH_1_ALL_ASV_NMDS$stress
## [1] 0.09881693
CH_1_ALL_ASV_NMDS<-data.frame(MDS1=CH_1_ALL_ASV_MDS1, MDS2=CH_1_ALL_ASV_MDS2, Sample=CH_1_ALL_ASV_metadata$Sample, Location=CH_1_ALL_ASV_metadata$Location)

CH_1_ALL_ASV<-ggplot(CH_1_ALL_ASV_NMDS, aes(x = MDS1, y = MDS2, col = Location)) + geom_point() + stat_ellipse() + theme_classic() + scale_colour_manual(values = c("red", "black", "blue", "green")) + labs(title = "Cranberry vs. French Creek Microbial Communities", subtitle = "Northern Pike Gut Microbiota and Water Column Bacteria")

CH_1_ALL_ASV
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

pairwise.adonis(CH_1_ALL_ASV_dist, factors = CH_1_ALL_ASV_metadata$Location)
##                                 pairs    F.Model        R2 p.value
## 1   Cranberry H2O vs French Creek H2O   4.979361 0.1993390   0.003
## 2    Cranberry H2O vs French Creek NP 357.432404 0.9520553   0.001
## 3       Cranberry H2O vs Cranberry NP 359.549912 0.9374267   0.001
## 4 French Creek H2O vs French Creek NP 436.650859 0.9604092   0.001
## 5    French Creek H2O vs Cranberry NP 410.057267 0.9447078   0.001
## 6     French Creek NP vs Cranberry NP  11.620734 0.3456419   0.001
##   p.adjusted sig
## 1      0.018   .
## 2      0.006   *
## 3      0.006   *
## 4      0.006   *
## 5      0.006   *
## 6      0.006   *

H2O NMDS:

CH_1_H2O_ASV<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/NMDS/CH_1_seqs_taxa_H2O.csv", header = TRUE, check.names = FALSE, row.names = 1)

CH_1_H2O_ASV_metadata<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/NMDS/CH_1_H2O_metadata.csv", header=TRUE, check.names = FALSE)

CH_1_H2O_t_ASV<-as.data.frame(t(CH_1_H2O_ASV))
CH_1_H2O_ASV_min_depth<-min(colSums(CH_1_H2O_ASV))
CH_1_H2O_t_ASV_rarefied<-as.data.frame(round(rrarefy(CH_1_H2O_t_ASV, 10835)))
CH_1_H2O_ASV_dist = as.matrix(vegdist(CH_1_H2O_t_ASV_rarefied, "bray"))
CH_1_H2O_ASV_NMDS = metaMDS(CH_1_H2O_ASV_dist)
## Run 0 stress 0.1653655 
## Run 1 stress 0.1623115 
## ... New best solution
## ... Procrustes: rmse 0.1255831  max resid 0.4530973 
## Run 2 stress 0.1720984 
## Run 3 stress 0.220857 
## Run 4 stress 0.2321761 
## Run 5 stress 0.1623115 
## ... Procrustes: rmse 2.237287e-05  max resid 7.061574e-05 
## ... Similar to previous best
## Run 6 stress 0.1653655 
## Run 7 stress 0.2403046 
## Run 8 stress 0.2115946 
## Run 9 stress 0.264751 
## Run 10 stress 0.2295816 
## Run 11 stress 0.1653656 
## Run 12 stress 0.1805557 
## Run 13 stress 0.2391431 
## Run 14 stress 0.2195277 
## Run 15 stress 0.2215675 
## Run 16 stress 0.1623115 
## ... Procrustes: rmse 1.427786e-05  max resid 4.307807e-05 
## ... Similar to previous best
## Run 17 stress 0.1653656 
## Run 18 stress 0.1635876 
## Run 19 stress 0.1653662 
## Run 20 stress 0.1623115 
## ... Procrustes: rmse 1.184495e-05  max resid 3.938038e-05 
## ... Similar to previous best
## *** Solution reached
CH_1_H2O_ASV_MDS1 <-CH_1_H2O_ASV_NMDS$points[,1]
CH_1_H2O_ASV_MDS2<-CH_1_H2O_ASV_NMDS$points[,2]
CH_1_H2O_ASV_NMDS$stress
## [1] 0.1623115
CH_1_H2O_ASV_NMDS<-data.frame(MDS1=CH_1_H2O_ASV_MDS1, MDS2=CH_1_H2O_ASV_MDS2, Sample=CH_1_H2O_ASV_metadata$Sample, Location=CH_1_H2O_ASV_metadata$Location, Position = CH_1_H2O_ASV_metadata$Position)

CH_1_H2O_ASV<-ggplot(CH_1_H2O_ASV_NMDS, aes(x = MDS1, y = MDS2, col = Location)) + geom_point() + stat_ellipse() + theme_classic() + scale_colour_manual(values = c("red", "black")) + labs(title = "Water Column Bacteria", subtitle = "Cranberry Creek vs. French Creek")

CH_1_H2O_ASV

#Significance by Location
pairwise.adonis(CH_1_H2O_ASV_dist, factors = CH_1_H2O_ASV_metadata$Location)
##                               pairs  F.Model        R2 p.value p.adjusted
## 1 Cranberry H2O vs French Creek H2O 5.707285 0.2220104   0.001      0.001
##   sig
## 1  **
#Significance by Water Column Position
pairwise.adonis(CH_1_H2O_ASV_dist, factors = CH_1_H2O_ASV_metadata$Position)
##           pairs   F.Model        R2 p.value p.adjusted sig
## 1 TOP vs BOTTOM 0.9511421 0.0453981   0.398      0.398

NP NMDS:

CH_1_NP_ASV<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/NMDS/CH_1_seqs_taxa_NP.csv", header = TRUE, check.names = FALSE, row.names = 1)

CH_1_NP_ASV_metadata<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/NMDS/CH_1_NP_metadata.csv", header=TRUE, check.names = FALSE)

CH_1_NP_t_ASV<-as.data.frame(t(CH_1_NP_ASV))
CH_1_NP_ASV_min_depth<-min(colSums(CH_1_NP_ASV))
CH_1_NP_t_ASV_rarefied<-as.data.frame(round(rrarefy(CH_1_NP_t_ASV,CH_1_NP_ASV_min_depth)))
CH_1_NP_ASV_dist = as.matrix(vegdist(CH_1_NP_t_ASV_rarefied, "bray"))
CH_1_NP_ASV_NMDS = metaMDS(CH_1_NP_ASV_dist)
## Run 0 stress 0.1785157 
## Run 1 stress 0.2012448 
## Run 2 stress 0.1757351 
## ... New best solution
## ... Procrustes: rmse 0.09479133  max resid 0.2863075 
## Run 3 stress 0.1753254 
## ... New best solution
## ... Procrustes: rmse 0.1762159  max resid 0.4552027 
## Run 4 stress 0.1805747 
## Run 5 stress 0.3731032 
## Run 6 stress 0.1877325 
## Run 7 stress 0.1757365 
## ... Procrustes: rmse 0.1762787  max resid 0.4632109 
## Run 8 stress 0.2174771 
## Run 9 stress 0.1763051 
## Run 10 stress 0.1753209 
## ... New best solution
## ... Procrustes: rmse 0.0007094411  max resid 0.002501052 
## ... Similar to previous best
## Run 11 stress 0.1816692 
## Run 12 stress 0.1832908 
## Run 13 stress 0.1779428 
## Run 14 stress 0.1862091 
## Run 15 stress 0.1757334 
## ... Procrustes: rmse 0.1762785  max resid 0.4637288 
## Run 16 stress 0.1890442 
## Run 17 stress 0.1759089 
## Run 18 stress 0.1797656 
## Run 19 stress 0.1790026 
## Run 20 stress 0.1980181 
## *** Solution reached
CH_1_NP_ASV_MDS1 <-CH_1_NP_ASV_NMDS$points[,1]
CH_1_NP_ASV_MDS2<-CH_1_NP_ASV_NMDS$points[,2]
CH_1_NP_ASV_NMDS$stress
## [1] 0.1753209
CH_1_NP_ASV_NMDS<-data.frame(MDS1=CH_1_NP_ASV_MDS1, MDS2=CH_1_NP_ASV_MDS2, Sample=CH_1_NP_ASV_metadata$Sample, Location=CH_1_NP_ASV_metadata$Location, Size = CH_1_NP_ASV_metadata$Size)

CH_1_NP_ASV<-ggplot(CH_1_NP_ASV_NMDS, aes(x = MDS1, y = MDS2, col = Location)) + geom_point() + stat_ellipse() + theme_classic()+ scale_colour_manual(values = c("red", "black")) + labs(title = "Northern Pike Gut Microbiota", subtitle = "Cranberry Creek vs. French Creek")

CH_1_NP_ASV

#Significance by Location
pairwise.adonis(CH_1_NP_ASV_dist, factors = CH_1_NP_ASV_metadata$Location)
##                       pairs F.Model        R2 p.value p.adjusted sig
## 1 French Creek vs Cranberry 12.2461 0.3575911   0.001      0.001  **
#Significance by Fish Size
pairwise.adonis(CH_1_NP_ASV_dist, factors = CH_1_NP_ASV_metadata$Size)
##               pairs   F.Model         R2 p.value p.adjusted sig
## 1    61-70 vs 51-60 2.7164437 0.31164587   0.080       0.80    
## 2    61-70 vs 71-80 0.3452952 0.03694856   0.819       1.00    
## 3    61-70 vs 81-90 1.2542426 0.13553163   0.314       1.00    
## 4  61-70 vs 91-100+ 1.0390951 0.11495565   0.361       1.00    
## 5    51-60 vs 71-80 2.3495664 0.25130218   0.095       0.95    
## 6    51-60 vs 81-90 5.9256114 0.49688114   0.029       0.29    
## 7  51-60 vs 91-100+ 1.7298797 0.22379129   0.260       1.00    
## 8    71-80 vs 81-90 1.6601743 0.15573613   0.203       1.00    
## 9  71-80 vs 91-100+ 1.2471671 0.12170848   0.298       1.00    
## 10 81-90 vs 91-100+ 0.8860181 0.09970923   0.441       1.00

Rarefaction Analysis - Determining Sequencing Depth

All Samples

Individual File: CH_1_Rarefaction_Analysis.Rmd

Initial Analysis is completed with biom-format and mothur commands using bash commands. Please refer to biom_mothur_code.txt in pathway ~/Dropbox/bdgallo_MS_SUNYESF/NON_R_Scripts for information

The final product fom these commands is CH_1_seqtab_ALL_COMBINED.groups.rarefaction

CH_1_ALLSEQS_Rare<-read.table("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/CH_1_Rarefaction_Alpha_Diversity/CH_1_seqtab_ALL_COMBINED.groups.rarefaction", header = TRUE)
CH_1_ALLSEQS_Rare_DataColumnsPrimary<-CH_1_ALLSEQS_Rare[grepl("use", names(CH_1_ALLSEQS_Rare))]
CH_1_ALLSEQS_Rare_numSampled<-CH_1_ALLSEQS_Rare[, c("numsampled")]
CH_1_ALLSEQS_Rare<-cbind (CH_1_ALLSEQS_Rare_numSampled, CH_1_ALLSEQS_Rare_DataColumnsPrimary)
CH_1_ALLSEQS_Rare_long<-melt(CH_1_ALLSEQS_Rare, id.vars="CH_1_ALLSEQS_Rare_numSampled")
write.csv(CH_1_ALLSEQS_Rare_long, "CH_1_ALLSEQS_Rare_long.csv")

###Export .csv file and manually input sample locations using metadata###

CH_1_ALLSEQS_Rare_long<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/CH_1_Rarefaction_Alpha_Diversity/CH_1_ALLSEQS_Rare_long.csv", header = TRUE)

options (scipen = 10000000)
CH_1_ALLSEQS_rarefaction<-ggplot(data=CH_1_ALLSEQS_Rare_long, aes(x = CH_1_ALLSEQS_Rare_numSampled, y = value, color = Location.Type)) + geom_line(aes(group=variable), size=1.5) + xlab("Number of Sequences") + ylab("ASVs (100%)") + labs(title = "Rarefaction Curves", subtitle = "Cranberry Creek and French Creek H2O and YOY Northern Pike") + theme_classic()+ scale_color_manual(values = c("black", "red", "blue", "green"))

CH_1_ALLSEQS_rarefaction
## Warning: Removed 13816 rows containing missing values (geom_path).

###Using CH_1_ALLSEQS_Rare_long.csv file, split up H2O and NP Samples into two separate .csv files###

CH_1_H2O_Rare_long<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/CH_1_Rarefaction_Alpha_Diversity/CH_1_H2O_long.csv", header = TRUE)
CH_1_NP_Rare_long<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/CH_1_Rarefaction_Alpha_Diversity/CH_1_NP_long.csv", header = TRUE)

CH_1_H2O_rarefaction<-ggplot(data=CH_1_H2O_Rare_long, aes(x = CH_1_ALLSEQS_Rare_numSampled, y = value, color = Location.Type)) + geom_line(aes(group=variable), size=1.5) + scale_color_manual(values = c("blue", "green", "pink")) + xlab("Number of Sequences") + ylab("ASVs (100%)") + labs(title = "Rarefaction Curves", subtitle = "Cranberry Creek and French Creek H2O") + theme_classic()

CH_1_H2O_rarefaction
## Warning: Removed 7731 rows containing missing values (geom_path).

CH_1_NP_rarefaction<-ggplot(data=CH_1_NP_Rare_long, aes(x = CH_1_ALLSEQS_Rare_numSampled, y = value, color = Location.Type)) + geom_line(aes(group=variable), size=1.5) + scale_color_manual(values = c("black", "red", "brown")) + xlab("Number of Sequences") + ylab("ASVs (100%)") + labs(title = "Rarefaction Curves", subtitle = "Cranberry Creek and French Creek Northern Pike") + theme_classic()

CH_1_NP_rarefaction
## Warning: Removed 6085 rows containing missing values (geom_path).

Taxa Analysis - Determining Top 20 Taxa for Treatments

All Samples

Individual File: CH_1_taxa_graphs.Rmd

Note that taxa excel sheet was cleaned manually by removing all NA calls in Kingdom and Phylum columns. Additionally, any Kingdom tracing to Eukaryotes/Archaea were also removed

Normalized Top 10 Taxa Bar Graphs:

#'CH_1_ALL_melted' phyloseq object carried over from Chunk #2 above

CH_1_ALL_t_ASV_rarefied_t<-(t(CH_1_ALL_t_ASV_rarefied)) #Normalized ALL otu table

CH_1_ALL_Norm<-as.matrix(CH_1_ALL_t_ASV_rarefied_t)
CH_1_ALL_otu_Norm<-otu_table(CH_1_ALL_Norm, taxa_are_rows = TRUE)

CH_1_ALL_Phylo_Norm<-phyloseq(CH_1_ALL_otu_Norm,CH_1_taxa_Phylo,CH_1_ALL_metadata)
CH_1_ALL_melted_Norm<-tax_glom(CH_1_ALL_Phylo_Norm, "Genus")

CH_1_ALL_melted_otu<-otu_table(CH_1_ALL_melted_Norm)
CH_1_taxa_top10<-names(sort(taxa_sums(CH_1_ALL_melted_Norm), decreasing = TRUE)) [1:10]
CH_1_ALL_Phylo.top10<-transform_sample_counts(CH_1_ALL_melted_Norm, function(CH_1_ALL_melted_otu) CH_1_ALL_melted_otu/sum(CH_1_ALL_melted_otu))
CH_1_ALL_Phylo.top10<-prune_taxa(CH_1_taxa_top10, CH_1_ALL_Phylo.top10)

sample_data(CH_1_ALL_Phylo.top10)$Location<-factor(sample_data(CH_1_ALL_Phylo.top10)$Location, levels = c("Cranberry H2O", "French Creek H2O", "Cranberry NP", "French Creek NP"))

CH_1_Top10<-plot_bar(CH_1_ALL_Phylo.top10, x = "Location", fill = "Genus") + scale_fill_brewer(palette = "Paired") + theme_classic()
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
CH_1_Top10 + geom_bar(aes(fill=Genus), stat="identity", position = "stack") +  scale_fill_brewer(palette = "Paired")
## Scale for 'fill' is already present. Adding another scale for 'fill',
## which will replace the existing scale.

write.csv(otu_table(CH_1_ALL_Phylo.top10), "CH_1_Top10_Taxa_Percent.csv")
#Offload Top20 ASV data - calculate average for each group so that y-axis = 0-1 (e.g. 0 - 100%). Also calculate SE and reupload data.


#ENV

ZOOP_Palette<-function (n) 
{
    x <- ramp(seq.int(0, 1, length.out = n))
    if (ncol(x) == 4L) 
        rgb(x[, 1L], x[, 2L], x[, 3L], x[, 4L], maxColorValue = 255)
    else rgb(x[, 1L], x[, 2L], x[, 3L], maxColorValue = 255)
}

CH_1_Top10_Taxa_Percent_ENV<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Taxa_Graphs/CH_1_Top10_Taxa_Percent_ENV.csv", header=TRUE, check.names = FALSE)

CH_1_Top10_Taxa_Percent_ENV$Location<-factor(CH_1_Top10_Taxa_Percent_ENV$Location, levels = c("Cranberry/H2O",  "French Creek/H2O"))

colourCount<-length(unique(CH_1_Top10_Taxa_Percent_ENV$Genus))
getPalette<-colorRampPalette(brewer.pal(8, "Set1"))

CH_1_Top10_Taxa_Percent_ENV_Graph<-ggplot(CH_1_Top10_Taxa_Percent_ENV, aes(x=Location, y = Average, fill = Genus)) + geom_bar(colour = "black", stat = "identity", position = position_dodge()) + theme_classic() + scale_fill_manual(values = getPalette(colourCount)) + labs(title = "Larval Northern Pike Gut Microbiota", subtitle = "Top 10 Bacterial Genera Wetland Water Samples", x = "", y = "Relative ASV Abundance (%)")+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + geom_errorbar(aes(ymin=Average-SE, ymax = Average+SE), width=.2, position=position_dodge(.9))

CH_1_Top10_Taxa_Percent_ENV_Graph

#FISH

CH_1_Top10_Taxa_Percent_FISH<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Taxa_Graphs/CH_1_Top10_Taxa_Percent_FISH.csv", header=TRUE, check.names = FALSE)

CH_1_Top10_Taxa_Percent_FISH$Location<-factor(CH_1_Top10_Taxa_Percent_FISH$Location, levels = c("Cranberry/NP",  "French Creek/NP"))

colourCount<-length(unique(CH_1_Top10_Taxa_Percent_FISH$Genus))
getPalette<-colorRampPalette(brewer.pal(8, "Set1"))

CH_1_Top10_Taxa_Percent_FISH_Graph<-ggplot(CH_1_Top10_Taxa_Percent_FISH, aes(x=Location, y = Average, fill = Genus)) + geom_bar(colour = "black", stat = "identity", position = position_dodge()) + theme_classic() + scale_fill_manual(values = getPalette(colourCount)) + labs(title = "Larval Northern Pike Gut Microbiota", subtitle = "Top 10 Bacterial Genera in Northern Pike", x = "", y = "")+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + geom_errorbar(aes(ymin=Average-SE, ymax = Average+SE), width=.2, position=position_dodge(.9))

CH_1_Top10_Taxa_Percent_FISH_Graph

#COMBINED

ggarrange(CH_1_Top10_Taxa_Percent_ENV_Graph, CH_1_Top10_Taxa_Percent_FISH_Graph, labels = c("A", "B"), common.legend = TRUE, legend = "right")

Heatmap Clustering (Top 20 Taxa - Normalized):

#NMDS Heatmap
plot_heatmap(CH_1_ALL_Phylo.top10, sample.label = "Location",  "NMDS", "bray", "Genus")
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning: Transformation introduced infinite values in discrete y-axis

Alpha Diversity Analysis

Observed, Chao1, Shannon

Individual File: CH_2_Alpha_Diversity.Rmd

###Phyloseq Object from CH.1 taxa analysis carried over###

CH_1_ALL_Phylo_Alpha<-prune_taxa(taxa_sums(CH_1_ALL_melted_Norm) > 0, CH_1_ALL_melted_Norm)

plot_richness(CH_1_ALL_Phylo_Alpha, x = "Sample", measures = c("Observed", "Chao1","Shannon")) + theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 92 rows containing missing values (geom_errorbar).

plot_richness(CH_1_ALL_Phylo_Alpha, x = "Location", measures = c("Observed", "Chao1", "Shannon")) + theme_classic() + theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 92 rows containing missing values (geom_errorbar).

CH_1_ALL_Alpha<-estimate_richness(CH_1_ALL_Phylo, split = TRUE, measures = c("Observed", "Chao1", "Shannon"))
CH_1_ALL_Alpha$Location<-CH_1_ALL_ASV_NMDS$Location
head(CH_1_ALL_Alpha)
##                                  Observed     Chao1  se.chao1  Shannon
## H2O.041.513.718.1F_filt.fastq.gz       82  82.12500 0.4435997 2.588605
## H2O.042.515.718.1F_filt.fastq.gz      143 143.90909 1.3466632 3.268961
## H2O.043.516.718.1F_filt.fastq.gz      113 119.87500 5.5654232 2.627143
## H2O.044.517.718.1F_filt.fastq.gz      144 146.33333 2.5533155 2.510726
## H2O.045.518.719.1F_filt.fastq.gz      127 127.10000 0.3818439 2.544150
## H2O.046.520.719.1F_filt.fastq.gz       95  96.15385 1.5235758 2.707144
##                                       Location
## H2O.041.513.718.1F_filt.fastq.gz Cranberry H2O
## H2O.042.515.718.1F_filt.fastq.gz Cranberry H2O
## H2O.043.516.718.1F_filt.fastq.gz Cranberry H2O
## H2O.044.517.718.1F_filt.fastq.gz Cranberry H2O
## H2O.045.518.719.1F_filt.fastq.gz Cranberry H2O
## H2O.046.520.719.1F_filt.fastq.gz Cranberry H2O
CH_1_ALL_Obs_PH<-posthocTGH(y = CH_1_ALL_Alpha$Observed, x = CH_1_ALL_Alpha$Location, method = "games-howell")
CH_1_ALL_Obs_PH
##                   n means variances
## Cranberry H2O    11   118      1052
## Cranberry NP     15    12        43
## French Creek H2O 11   116      1193
## French Creek NP   9    17        41
## 
##                                    diff  ci.lo ci.hi     t df    p
## Cranberry NP-Cranberry H2O       -105.5 -135.6   -75 10.63 11 <.01
## French Creek H2O-Cranberry H2O     -2.0  -42.0    38  0.14 20    1
## French Creek NP-Cranberry H2O    -101.1 -131.3   -71 10.10 11 <.01
## French Creek H2O-Cranberry NP     103.5   71.5   136  9.81 11 <.01
## French Creek NP-Cranberry NP        4.4   -3.3    12  1.62 17  .39
## French Creek NP-French Creek H2O  -99.1 -131.2   -67  9.32 11 <.01
CH_1_ALL_Chao1_PH<-posthocTGH(y = CH_1_ALL_Alpha$Chao1, x = CH_1_ALL_Alpha$Location, method = "games-howell")
CH_1_ALL_Chao1_PH
##                   n means variances
## Cranberry H2O    11   120      1029
## Cranberry NP     15    12        43
## French Creek H2O 11   119      1128
## French Creek NP   9    17        41
## 
##                                     diff  ci.lo ci.hi      t df    p
## Cranberry NP-Cranberry H2O       -107.06 -136.8   -77 10.900 11 <.01
## French Creek H2O-Cranberry H2O     -0.64  -39.8    39  0.045 20    1
## French Creek NP-Cranberry H2O    -102.64 -132.5   -73 10.360 11 <.01
## French Creek H2O-Cranberry NP     106.42   75.3   138 10.364 11 <.01
## French Creek NP-Cranberry NP        4.42   -3.3    12  1.619 17  .39
## French Creek NP-French Creek H2O -102.00 -133.2   -71  9.855 11 <.01
CH_1_ALL_Shannon_PH<-posthocTGH(y = CH_1_ALL_Alpha$Shannon, x = CH_1_ALL_Alpha$Location, method = "games-howell")
CH_1_ALL_Shannon_PH
##                   n means variances
## Cranberry H2O    11   2.7      0.12
## Cranberry NP     15   1.0      0.22
## French Creek H2O 11   2.8      0.11
## French Creek NP   9   1.1      0.29
## 
##                                    diff ci.lo ci.hi     t df    p
## Cranberry NP-Cranberry H2O       -1.681 -2.12 -1.24 10.48 24 <.01
## French Creek H2O-Cranberry H2O    0.079 -0.32  0.48  0.55 20  .94
## French Creek NP-Cranberry H2O    -1.612 -2.22 -1.01  7.80 13 <.01
## French Creek H2O-Cranberry NP     1.760  1.32  2.20 11.15 24 <.01
## French Creek NP-Cranberry NP      0.068 -0.55  0.69  0.32 15  .99
## French Creek NP-French Creek H2O -1.692 -2.29 -1.09  8.27 13 <.01

Core Microbiome Analysis

Microbes with >50% coverage and >1% total microbiota per sample

Individual File: CH_1_Core_Microbiome.Rmd

Cran NP (Normalized):

CH_1_CranNP<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_CranNP.csv", header = TRUE, check.names = FALSE)
CH_1_CranNP.names<-make.names(CH_1_CranNP$Genus, unique = TRUE)
rownames(CH_1_CranNP)<-CH_1_CranNP.names
CH_1_CranNP$Genus<-NULL

CH_1_CranNP_metadata<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_CranNP_metadata.csv", header=TRUE, check.names = FALSE, row.names = 1)

CH_1_CranNP_metadata_Phylo<-sample_data(CH_1_CranNP_metadata)

CH_1_CranNP_Phylo<-as.matrix(CH_1_CranNP)
CH_1_CranNP_Phylo<-otu_table(CH_1_CranNP_Phylo, taxa_are_rows = TRUE)
CH_1_CranNP_Phylo<-phyloseq(CH_1_CranNP_Phylo, CH_1_CranNP_metadata_Phylo)
CH_1_CranNP_rel<-transform(CH_1_CranNP_Phylo, "compositional")
CH_1_CranNP_Prevalence<-prevalence(CH_1_CranNP_rel, detection = 1/100, sort = TRUE)
CH_1_CranNP_Prevalence[1:50]
##                Cetobacterium                  Plesiomonas 
##                   0.86666667                   0.86666667 
##                    Aeromonas  Clostridium_sensu_stricto_1 
##                   0.73333333                   0.66666667 
##              Paraclostridium                 Epulopiscium 
##                   0.33333333                   0.26666667 
##             Terrisporobacter Clostridium_sensu_stricto_13 
##                   0.26666667                   0.20000000 
##           Macellibacteroides                   Romboutsia 
##                   0.13333333                   0.13333333 
##                 Turicibacter                    Neisseria 
##                   0.06666667                   0.06666667 
##               Porphyrobacter             Rubellimicrobium 
##                   0.06666667                   0.06666667 
##              Chromobacterium                Luteolibacter 
##                   0.06666667                   0.06666667 
##             Polynucleobacter                       Vibrio 
##                   0.06666667                   0.06666667 
##                Limnohabitans                    SBZC.1223 
##                   0.06666667                   0.00000000 
##                       GOUTB8                   Macromonas 
##                   0.00000000                   0.00000000 
##                    Aquincola                  Nannocystis 
##                   0.00000000                   0.00000000 
##               Rhizomicrobium                 Anaerofustis 
##                   0.00000000                   0.00000000 
##            Dendrosporobacter                  Rhodocyclus 
##                   0.00000000                   0.00000000 
##                 Methylovorus       Candidatus_Paceibacter 
##                   0.00000000                   0.00000000 
##              Quatrionicoccus               Mongoliicoccus 
##                   0.00000000                   0.00000000 
##         Candidatus_Flaviluna                Fimbriiglobus 
##                   0.00000000                   0.00000000 
##             Anabaena_BECID22                  Alpinimonas 
##                   0.00000000                   0.00000000 
##                   Minicystis                  Cereibacter 
##                   0.00000000                   0.00000000 
##         Candidatus_Limnoluna                 Spongiimonas 
##                   0.00000000                   0.00000000 
##                 Spongiivirga                 Salinirepens 
##                   0.00000000                   0.00000000 
##                   Kinneretia                    Mycoplana 
##                   0.00000000                   0.00000000 
##          Ruminiclostridium_1               Asaccharospora 
##                   0.00000000                   0.00000000 
##                   Nitrospira      Synechococcus_MBIC10613 
##                   0.00000000                   0.00000000 
##               Microbacterium                   Rickettsia 
##                   0.00000000                   0.00000000

FC Low NP (Normalized):

CH_1_FCLOW_NP<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_FCLOW_NP.csv", header = TRUE, check.names = FALSE)
CH_1_FCLOW_NP.names<-make.names(CH_1_FCLOW_NP$Genus, unique = TRUE)
rownames(CH_1_FCLOW_NP)<-CH_1_FCLOW_NP.names
CH_1_FCLOW_NP$Genus<-NULL
CH_1_FCLOW_NP$Genus<-NULL

CH_1_FC_LOW_NP_metadata<-read.csv("~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_FCLOW_NP_metadata.csv", header=TRUE, check.names = FALSE, row.names = 1)

CH_1_FC_LOW_NP_metadata_Phylo<-sample_data(CH_1_FC_LOW_NP_metadata)

CH_1_FCLOW_NP_Phylo<-as.matrix(CH_1_FCLOW_NP)
CH_1_FCLOW_NP_Phylo<-otu_table(CH_1_FCLOW_NP_Phylo, taxa_are_rows = TRUE)
CH_1_FCLOW_NP_Phylo<-phyloseq(CH_1_FCLOW_NP_Phylo, CH_1_FC_LOW_NP_metadata_Phylo)
CH_1_FCLOW_NP_rel<-transform(CH_1_FCLOW_NP_Phylo, "compositional")
CH_1_FCLOW_NP_Prevalence<-prevalence(CH_1_FCLOW_NP_rel, detection = 1/100, sort = TRUE)
CH_1_FCLOW_NP_Prevalence[1:50]
##  Clostridium_sensu_stricto_1                  Plesiomonas 
##                    0.8888889                    0.7777778 
##                    Aeromonas                   Romboutsia 
##                    0.6666667                    0.4444444 
##             Terrisporobacter              Paraclostridium 
##                    0.4444444                    0.4444444 
##                 Epulopiscium                    Brevinema 
##                    0.3333333                    0.3333333 
## Clostridium_sensu_stricto_13             Cellulosilyticum 
##                    0.2222222                    0.1111111 
##                 Hymenobacter                    Neisseria 
##                    0.1111111                    0.1111111 
##                Silvanigrella                    Mailhella 
##                    0.1111111                    0.1111111 
##                Cetobacterium                   Mycoplasma 
##                    0.1111111                    0.1111111 
##                    SBZC.1223                       GOUTB8 
##                    0.0000000                    0.0000000 
##                   Macromonas                    Aquincola 
##                    0.0000000                    0.0000000 
##                  Nannocystis               Rhizomicrobium 
##                    0.0000000                    0.0000000 
##                 Anaerofustis            Dendrosporobacter 
##                    0.0000000                    0.0000000 
##                  Rhodocyclus                 Methylovorus 
##                    0.0000000                    0.0000000 
##       Candidatus_Paceibacter              Quatrionicoccus 
##                    0.0000000                    0.0000000 
##               Mongoliicoccus         Candidatus_Flaviluna 
##                    0.0000000                    0.0000000 
##                Fimbriiglobus             Anabaena_BECID22 
##                    0.0000000                    0.0000000 
##                  Alpinimonas                   Minicystis 
##                    0.0000000                    0.0000000 
##                  Cereibacter         Candidatus_Limnoluna 
##                    0.0000000                    0.0000000 
##                 Spongiimonas                 Spongiivirga 
##                    0.0000000                    0.0000000 
##                 Salinirepens                   Kinneretia 
##                    0.0000000                    0.0000000 
##                    Mycoplana          Ruminiclostridium_1 
##                    0.0000000                    0.0000000 
##               Asaccharospora                   Nitrospira 
##                    0.0000000                    0.0000000 
##      Synechococcus_MBIC10613               Microbacterium 
##                    0.0000000                    0.0000000 
##                   Rickettsia                        AAP99 
##                    0.0000000                    0.0000000 
##                   Fonticella                Parachlamydia 
##                    0.0000000                    0.0000000

CRAN(Graph):

CRAN_H2O_Log2<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_CRAN_H2O_Log2.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote =
## quote, : incomplete final line found by readTableHeader on '~/Dropbox/
## bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/
## CH_1_CRAN_H2O_Log2.csv'
CRAN_NP_Log2<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_Cran_NP_Log2.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote =
## quote, : incomplete final line found by readTableHeader on '~/Dropbox/
## bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/
## CH_1_Cran_NP_Log2.csv'
CRAN_H2O_Log2_Graph<-ggplot(data = CRAN_H2O_Log2, aes(x=Location, y = AVG, fill = Genus)) + geom_bar(stat="identity", color="black", position = position_dodge()) + geom_errorbar(aes(ymin=AVG-SE, ymax = AVG+SE), width=.2, position=position_dodge(.9)) + ylim(0, 0.6) + labs(title = "Environmental Samples", subtitle = "Cranberry Creek", y = "ASV Abundance(%)", x = "") + theme_classic()

CRAN_NP_Log2_Graph<-ggplot(data = CRAN_NP_Log2, aes(x=Location, y = AVG, fill = Genus)) + geom_bar(stat="identity", color="black", position = position_dodge()) + geom_errorbar(aes(ymin=AVG-SE, ymax = AVG+SE), width=.2, position=position_dodge(.9)) + ylim(0, 60) + labs(title = "Core Gut Microbiome Selection in Larval Pike", subtitle = "Cranberry Creek", y = "", x = "") + theme_classic()

CRAN_Log2_FINAL<-ggarrange(CRAN_H2O_Log2_Graph, CRAN_NP_Log2_Graph, labels = c("A", "B"), common.legend = TRUE, legend = "right")
CRAN_Log2_FINAL

FC Low (Graph):

FCLOW_H2O_Log2<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_FCLOW_H2O_Log2.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote =
## quote, : incomplete final line found by readTableHeader on '~/Dropbox/
## bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/
## CH_1_FCLOW_H2O_Log2.csv'
FCLOW_NP_Log2<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_FCLOW_NP_Log2.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote =
## quote, : incomplete final line found by readTableHeader on '~/Dropbox/
## bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/
## CH_1_FCLOW_NP_Log2.csv'
FCLOW_H2O_Log2_Graph<-ggplot(data = FCLOW_H2O_Log2, aes(x=Location, y = AVG, fill = Genus)) + geom_bar(stat="identity", color="black", position = position_dodge()) + geom_errorbar(aes(ymin=AVG-SE, ymax = AVG+SE), width=.2, position=position_dodge(.9)) + ylim(0, 0.5) + labs(title = "Environmental Samples", subtitle = "Lower French Creek", y = "ASV Abundance(%)", x = "") + theme_classic()

FCLOW_NP_Log2_Graph<-ggplot(data = FCLOW_NP_Log2, aes(x=Location, y = AVG, fill = Genus)) + geom_bar(stat="identity", color="black", position = position_dodge()) + geom_errorbar(aes(ymin=AVG-SE, ymax = AVG+SE), width=.2, position=position_dodge(.9)) + ylim(0, 50) + labs(title = "Core Gut Microbiome Selection in Larval Pike", subtitle = "Lower French Creek", y = "", x = "") + theme_classic()

FCLOW_Log2_FINAL<-ggarrange(FCLOW_H2O_Log2_Graph, FCLOW_NP_Log2_Graph, labels = c("A", "B"), common.legend = TRUE, legend = "right")
FCLOW_Log2_FINAL

Comparing between Core Gut Microbiotas (Environments + NP) Graphs:

CH_1_ALL_ENV_Log2<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_ENV_COMPARE_ALL_MERGE_Log2.csv", header = TRUE, check.names = FALSE)

CH_1_ALL_ENV_Log2_Graph<-ggplot(data = CH_1_ALL_ENV_Log2, aes(x=Location, y = AVG, fill = Genus)) + geom_bar(stat="identity", color="black", position = position_dodge()) + geom_errorbar(aes(ymin=AVG-SE, ymax = AVG+SE), width=.2, position=position_dodge(.9)) + ylim(0, 0.2) + labs(title = "Environmental Samples", subtitle = "All Sites", y = "ASV Abundance(%)", x = "") + theme_classic()

CH_1_ALL_NP_Log2<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_NP_ALL_MERGE_Log2.csv", header = TRUE, check.names = FALSE)

CH_1_ALL_NP_Log2_Graph<-ggplot(data = CH_1_ALL_NP_Log2, aes(x=Location, y = AVG, fill = Genus)) + geom_bar(stat="identity", color="black", position = position_dodge()) + geom_errorbar(aes(ymin=AVG-SE, ymax = AVG+SE), width=.2, position=position_dodge(.9)) + labs(title = "Core Gut Microbiome Selection in Juvenile Pike", subtitle = "All Sites", x = "", y = "") + theme_classic()+ geom_text(aes(label = "Core", x = 0.66, y = 22), color = "black", size = 4)+ geom_text(aes(label = "Core", x = 0.89, y = 51), color = "black", size = 4)+ geom_text(aes(label = "Core", x = 1.12, y = 10), color = "black", size = 4) + geom_text(aes(label = "Core", x = 1.34, y = 30), color = "black", size = 4)+ geom_text(aes(label = "Core", x = 1.66, y = 18), color = "black", size = 4)+ geom_text(aes(label = "Core", x = 2.11, y = 37), color = "black", size = 4)+ geom_text(aes(label = "Core", x = 2.34, y = 11), color = "black", size = 4)

Core_FINAL<-ggarrange(CH_1_ALL_ENV_Log2_Graph, CH_1_ALL_NP_Log2_Graph, labels = c("A", "B"), common.legend = TRUE, legend = "right")

Core_FINAL

Determining significant effects on Core microbiota abundances between locations:

#All NP samples were grouped in one file with core microbiota genera and location of samples. Absolute abundance was determined by taking read count and dividing by 10,835 (Normalized depth)

CH_1_NP_compare<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_NP_Core_Compare.csv", header=TRUE, check.names = FALSE)

hist(CH_1_NP_compare$Abundance)

CH_1_Core_Abundance<-CH_1_NP_compare$Abundance
CH_1_Core_Abundance__scaled<-(CH_1_Core_Abundance-min(CH_1_Core_Abundance) + 0.000001/max(CH_1_Core_Abundance) + 0.000002)
CH_1_Core_fitbeta<-fitdist(CH_1_Core_Abundance__scaled, "beta")
CH_1_Core_fitbeta
## Fitting of the distribution ' beta ' by maximum likelihood 
## Parameters:
##         estimate Std. Error
## shape1 0.2921962  0.0335308
## shape2 1.4286120  0.2501722
plot(CH_1_Core_fitbeta)

cdfcomp(CH_1_Core_fitbeta, legendtext = "beta")

CH_1_core_beta_dist<-betareg(CH_1_Core_Abundance__scaled ~ Location*Genus, data=CH_1_NP_compare)
summary(CH_1_core_beta_dist)
## 
## Call:
## betareg(formula = CH_1_Core_Abundance__scaled ~ Location * Genus, 
##     data = CH_1_NP_compare)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -3.8515 -0.4506  0.2163  0.6563  1.8786 
## 
## Coefficients (mean model with logit link):
##                                                          Estimate
## (Intercept)                                              -1.75005
## LocationFrench Creek/NP                                  -0.03124
## GenusCetobacterium                                        1.11021
## GenusClostridium_sensu_stricto_1                         -0.31046
## GenusPlesiomonas                                          0.25243
## LocationFrench Creek/NP:GenusCetobacterium               -2.17527
## LocationFrench Creek/NP:GenusClostridium_sensu_stricto_1  1.02440
## LocationFrench Creek/NP:GenusPlesiomonas                 -0.54156
##                                                          Std. Error
## (Intercept)                                                 0.29483
## LocationFrench Creek/NP                                     0.45058
## GenusCetobacterium                                          0.39425
## GenusClostridium_sensu_stricto_1                            0.38948
## GenusPlesiomonas                                            0.39090
## LocationFrench Creek/NP:GenusCetobacterium                  0.63875
## LocationFrench Creek/NP:GenusClostridium_sensu_stricto_1    0.63872
## LocationFrench Creek/NP:GenusPlesiomonas                    0.63689
##                                                          z value
## (Intercept)                                               -5.936
## LocationFrench Creek/NP                                   -0.069
## GenusCetobacterium                                         2.816
## GenusClostridium_sensu_stricto_1                          -0.797
## GenusPlesiomonas                                           0.646
## LocationFrench Creek/NP:GenusCetobacterium                -3.406
## LocationFrench Creek/NP:GenusClostridium_sensu_stricto_1   1.604
## LocationFrench Creek/NP:GenusPlesiomonas                  -0.850
##                                                               Pr(>|z|)    
## (Intercept)                                              0.00000000292 ***
## LocationFrench Creek/NP                                        0.94472    
## GenusCetobacterium                                             0.00486 ** 
## GenusClostridium_sensu_stricto_1                               0.42539    
## GenusPlesiomonas                                               0.51843    
## LocationFrench Creek/NP:GenusCetobacterium                     0.00066 ***
## LocationFrench Creek/NP:GenusClostridium_sensu_stricto_1       0.10875    
## LocationFrench Creek/NP:GenusPlesiomonas                       0.39515    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value       Pr(>|z|)    
## (phi)   2.3259     0.3626   6.414 0.000000000142 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 158.8 on 9 Df
## Pseudo R-squared: 0.286
## Number of iterations: 34 (BFGS) + 1 (Fisher scoring)
Anova(CH_1_core_beta_dist, type = 3)
## Analysis of Deviance Table (Type III tests)
## 
## Response: CH_1_Core_Abundance__scaled
##                Df   Chisq     Pr(>Chisq)    
## (Intercept)     1 35.2338 0.000000002924 ***
## Location        1  0.0048       0.944717    
## Genus           3 14.1875       0.002661 ** 
## Location:Genus  3 25.5518 0.000011836917 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Proc_core_marginal<-emmeans(CH_1_core_beta_dist, ~ Location*Genus)
pairs(Proc_core_marginal, adjust = "tukey")
##  contrast                                                                              
##  Cranberry/NP,Aeromonas - French Creek/NP,Aeromonas                                    
##  Cranberry/NP,Aeromonas - Cranberry/NP,Cetobacterium                                   
##  Cranberry/NP,Aeromonas - French Creek/NP,Cetobacterium                                
##  Cranberry/NP,Aeromonas - Cranberry/NP,Clostridium_sensu_stricto_1                     
##  Cranberry/NP,Aeromonas - French Creek/NP,Clostridium_sensu_stricto_1                  
##  Cranberry/NP,Aeromonas - Cranberry/NP,Plesiomonas                                     
##  Cranberry/NP,Aeromonas - French Creek/NP,Plesiomonas                                  
##  French Creek/NP,Aeromonas - Cranberry/NP,Cetobacterium                                
##  French Creek/NP,Aeromonas - French Creek/NP,Cetobacterium                             
##  French Creek/NP,Aeromonas - Cranberry/NP,Clostridium_sensu_stricto_1                  
##  French Creek/NP,Aeromonas - French Creek/NP,Clostridium_sensu_stricto_1               
##  French Creek/NP,Aeromonas - Cranberry/NP,Plesiomonas                                  
##  French Creek/NP,Aeromonas - French Creek/NP,Plesiomonas                               
##  Cranberry/NP,Cetobacterium - French Creek/NP,Cetobacterium                            
##  Cranberry/NP,Cetobacterium - Cranberry/NP,Clostridium_sensu_stricto_1                 
##  Cranberry/NP,Cetobacterium - French Creek/NP,Clostridium_sensu_stricto_1              
##  Cranberry/NP,Cetobacterium - Cranberry/NP,Plesiomonas                                 
##  Cranberry/NP,Cetobacterium - French Creek/NP,Plesiomonas                              
##  French Creek/NP,Cetobacterium - Cranberry/NP,Clostridium_sensu_stricto_1              
##  French Creek/NP,Cetobacterium - French Creek/NP,Clostridium_sensu_stricto_1           
##  French Creek/NP,Cetobacterium - Cranberry/NP,Plesiomonas                              
##  French Creek/NP,Cetobacterium - French Creek/NP,Plesiomonas                           
##  Cranberry/NP,Clostridium_sensu_stricto_1 - French Creek/NP,Clostridium_sensu_stricto_1
##  Cranberry/NP,Clostridium_sensu_stricto_1 - Cranberry/NP,Plesiomonas                   
##  Cranberry/NP,Clostridium_sensu_stricto_1 - French Creek/NP,Plesiomonas                
##  French Creek/NP,Clostridium_sensu_stricto_1 - Cranberry/NP,Plesiomonas                
##  French Creek/NP,Clostridium_sensu_stricto_1 - French Creek/NP,Plesiomonas             
##  Cranberry/NP,Plesiomonas - French Creek/NP,Plesiomonas                                
##     estimate         SE  df z.ratio p.value
##   0.00389751 0.05605564 Inf   0.070  1.0000
##  -0.19724240 0.07123953 Inf  -2.769  0.1029
##   0.09317049 0.03963718 Inf   2.351  0.2662
##   0.03504597 0.04439228 Inf   0.789  0.9937
##  -0.10786480 0.07627127 Inf  -1.414  0.8511
##  -0.03473991 0.05402269 Inf  -0.643  0.9983
##   0.03603587 0.04955534 Inf   0.727  0.9962
##  -0.20113991 0.07611213 Inf  -2.643  0.1408
##   0.08927298 0.04783958 Inf   1.866  0.5744
##   0.03114846 0.05185763 Inf   0.601  0.9989
##  -0.11176232 0.08084614 Inf  -1.382  0.8656
##  -0.03863742 0.06031115 Inf  -0.641  0.9983
##   0.03213835 0.05634056 Inf   0.570  0.9992
##   0.29041289 0.06462969 Inf   4.493  0.0002
##   0.23228837 0.06790828 Inf   3.421  0.0145
##   0.08937760 0.09201547 Inf   0.971  0.9784
##   0.16250249 0.07466301 Inf   2.176  0.3660
##   0.23327827 0.07138754 Inf   3.268  0.0241
##  -0.05812452 0.03319247 Inf  -1.751  0.6533
##  -0.20103529 0.07043951 Inf  -2.854  0.0822
##  -0.12791040 0.04553002 Inf  -2.809  0.0926
##  -0.05713462 0.03982670 Inf  -1.435  0.8413
##  -0.14291077 0.07323810 Inf  -1.951  0.5153
##  -0.06978588 0.04967248 Inf  -1.405  0.8554
##   0.00098990 0.04470948 Inf   0.022  1.0000
##   0.07312489 0.07945248 Inf   0.920  0.9842
##   0.14390067 0.07647775 Inf   1.882  0.5636
##   0.07077578 0.05433666 Inf   1.303  0.8983
## 
## P value adjustment: tukey method for comparing a family of 8 estimates
cld(Proc_core_marginal, alpha = 0.05, Letters = letters, adjust = "tukey")
##  Location        Genus                          emmean         SE  df
##  French Creek/NP Cetobacterium               0.0548698 0.01937053 Inf
##  French Creek/NP Plesiomonas                 0.1120045 0.03710784 Inf
##  Cranberry/NP    Clostridium_sensu_stricto_1 0.1129944 0.02988853 Inf
##  French Creek/NP Aeromonas                   0.1441428 0.04581108 Inf
##  Cranberry/NP    Aeromonas                   0.1480403 0.03718525 Inf
##  Cranberry/NP    Plesiomonas                 0.1827802 0.04350646 Inf
##  French Creek/NP Clostridium_sensu_stricto_1 0.2559051 0.06906070 Inf
##  Cranberry/NP    Cetobacterium               0.3452827 0.06271771 Inf
##   asymp.LCL asymp.UCL .group
##  0.00204625 0.1076934  a    
##  0.01081107 0.2131978  a    
##  0.03148808 0.1945006  a    
##  0.01921564 0.2690700  ab   
##  0.04663585 0.2494448  ab   
##  0.06413778 0.3014227  ab   
##  0.06757606 0.4442342  ab   
##  0.17425104 0.5163144   b   
## 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 8 estimates 
## P value adjustment: tukey method for comparing a family of 8 estimates 
## significance level used: alpha = 0.05

Creating Log2 Graphs (Environment vs. Fish)

CRAN

#Combine data from CH_1_CRAN_H2O_Log2 & CH_1_CRAN_NP_Log2
#Rename .csv as CH_1_Cran_Compare.csv

CH_1_Cran_Compare<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_CRAN_COMPARE.csv", header=TRUE, check.names = FALSE)

CH_1_Cran_log2fc<-log2(CH_1_Cran_Compare$Log2)

ggplot(CH_1_Cran_Compare, aes(Genus, CH_1_Cran_log2fc), size = 10, shape = 2) +  geom_point(shape=18, size = 10) + ylim(-5,16) + theme_classic()+ labs (y = "Log2-Fold Change", title =  "Cranberry Creek", subtitle = "Northern Pike vs. Water Samples") + theme(axis.text.x = element_text(angle=315, vjust = -0.25, hjust = .25))+ geom_hline(yintercept = 0) + geom_hline(yintercept = -1, linetype = "dashed") + geom_hline(yintercept = 1, linetype = "dashed")
## Warning: Removed 4 rows containing missing values (geom_point).

FC LOW

#Combine data from CH_1_FCLOW_H2O_Log2 & CH_1_FCLOW_NP_Log2
#Rename .csv as CH_1_FCLOW_Compare.csv

CH_1_FCLOW_Compare<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Core_Microbiome/CH_1_FCLOW_COMPARE.csv", header=TRUE, check.names = FALSE)

CH_1_FCLOW_log2fc<-log2(CH_1_FCLOW_Compare$Log2)
CH_1_FCLOW_Compare$Log2<-CH_1_FCLOW_log2fc

ggplot(CH_1_FCLOW_Compare, aes(Genus, CH_1_FCLOW_log2fc), size = 10, shape = 2) +  geom_point(shape=18, size = 10) + ylim(0,16) + theme_classic() +  labs (y = "Log2-Fold Change", title =  "Lower French Creek", subtitle = "Northern Pike vs. Water Samples") + theme(axis.text.x = element_text(angle=315, vjust = -0.25, hjust = .25))+ geom_hline(yintercept = -1, linetype = "dashed") + geom_hline(yintercept = 1, linetype = "dashed")
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_hline).

Tax4Fun Analysis - Predicting Microbial Functions based on Community Composition

Site vs. Site

Individual File: CH_1_Tax4Fun_Microbial_Function_Predictions.Rmd

Determine Microbe Functions with Tax4Fun

#Note the Mycoplasma dominated FC Low sample (NP 002) was removed as an outlier.

CH_1_Tax4Fun_ASVs<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/CH_1_Tax4Fun_ASVs.csv", header = TRUE, check.names = FALSE, row.names = 1)

CH_1_Tax4Fun_ASVs_t<-as.data.frame(t(CH_1_Tax4Fun_ASVs))
CH_1_Tax4Fun_ASVs_m<-as.matrix(CH_1_Tax4Fun_ASVs_t)
CH_1_Tax4Fun_ASVs_list<-list(CH_1_Tax4Fun_ASVs_m, CH_1_taxa)
names(CH_1_Tax4Fun_ASVs_list)<-paste("name", 1:2, sep="")
tmp<-tempdir()
download_ref(tmp,reference='silva_ko',overwrite=FALSE)

CH_1_FUNCTIONS <- t4f(CH_1_Tax4Fun_ASVs_list$name1, rows_are_taxa=FALSE, tax_table = CH_1_Tax4Fun_ASVs_list$name2, reference_path = tmp, type = 'uproc', short=TRUE, cn_normalize = TRUE, sample_normalize =TRUE, drop=TRUE)

write.csv(CH_1_FUNCTIONS$fxn_table, "CH_1_FUNCTIONS_FXN_TABLE.csv")
write.csv(CH_1_FUNCTIONS$method_meta, "CH_1_FUNCTIONS_METHOD_META.csv")
write.csv(CH_1_FUNCTIONS$fxn_meta$KEGG_Description, "CH_1_FUNCTIONS_FXN_META_KEGG_DESCRIPTION.csv")
write.csv(CH_1_FUNCTIONS$fxn_meta$KEGG_Pathways, "CH_1_FUNCTIONS_FXN_META_KEGG_PATHWAYS.csv")

Function Bar Graph (Top 20 KO’s):

head(sort(colSums(CH_1_FUNCTIONS$fxn_table), decreasing = TRUE), n = 20)
##     K02035     K03406     K03737     K15125     K03046     K03043 
## 0.11398623 0.08763998 0.08725464 0.08086655 0.07455367 0.06786098 
##     K02015     K00656     K03657     K01955     K03701     K02337 
## 0.06765369 0.06492334 0.06482366 0.06298126 0.06164818 0.05645908 
##     K01952     K02033     K03763     K01870     K03070     K01873 
## 0.05636266 0.05616618 0.05383037 0.05261535 0.05107165 0.04906250 
##     K01872     K02032 
## 0.04889534 0.04860961
###Determine's Top 20 KO Hits - Fill out CH_1_Tax4Fun_Analysis_ForR.csv with average ASV Relative abundance values (+/- SE), remove KO's with unknown KEGG pathways, and combine simialr pathways based on level 1 KEGG description (include level 2 in parentheses). For metabolic KOs, group all as "Metabolic Pathways".


#Environmental Signal Processing

CH_1_Tax4Fun_ENV_Proc<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/CH_1_Tax4Fun_Analysis_Env_Sig.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on '~/
## Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/
## CH_1_Tax4Fun_Analysis_Env_Sig.csv'
CH_1_Tax4Fun_Env_Sig_Graph<-ggplot(data = CH_1_Tax4Fun_ENV_Proc, aes(x = Pathways, y =AVG, fill = Location)) + geom_bar(stat="identity", color = "black", position = position_dodge()) + labs (title = "Tax4Fun Microbial Function Predictions", subtitle = "Environmental Information Processing (Signal Transduction)", y = "Relative Gene Abundance", x = "") + theme(axis.text.y = element_text(size =7)) + geom_errorbar(aes(ymin=AVG-SE, ymax=AVG+SE), width=.2, position=position_dodge(0.9)) + theme_classic() + theme(axis.ticks.x.bottom = element_blank()) + theme(axis.text.x = element_blank()) + scale_fill_manual(values = c("darkslategray2", "darkgoldenrod4"))

CH_1_Tax4Fun_Env_Sig_Graph

#Cellular Processes

CH_1_Tax4Fun_Cell_Proc<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/CH_1_Tax4Fun_Cell_Proc.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on '~/
## Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/
## CH_1_Tax4Fun_Cell_Proc.csv'
CH_1_Tax4Fun_Cell_Sig_Graph<-ggplot(data = CH_1_Tax4Fun_Cell_Proc, aes(x = Pathways, y =AVG, fill = Location)) + geom_bar(stat="identity", color = "black", position = position_dodge()) + labs (title = "Tax4Fun Microbial Function Predictions", subtitle = "Cellular Processes (Motility)", y = "", x = "") + theme(axis.text.y = element_text(size =7)) + geom_errorbar(aes(ymin=AVG-SE, ymax=AVG+SE), width=.2, position=position_dodge(0.9)) + theme_classic() + theme(axis.ticks.x.bottom = element_blank()) + theme(axis.text.x = element_blank())+ scale_fill_manual(values = c("darkslategray2", "darkgoldenrod4"))

CH_1_Tax4Fun_Cell_Sig_Graph

#Metabolic Pathways

CH_1_Tax4Fun_Meta<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/CH_1_Tax4Fun_Metabolic.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on '~/
## Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/
## CH_1_Tax4Fun_Metabolic.csv'
CH_1_Tax4Fun_Metabolic_Graph<-ggplot(data = CH_1_Tax4Fun_Meta, aes(x = Pathways, y =AVG, fill = Location)) + geom_bar(stat="identity", color = "black", position = position_dodge()) + labs (title = "Tax4Fun Microbial Function Predictions", subtitle = "Metabolic Pathways", y = "Relative Gene Abundance", x = "") + theme(axis.text.y = element_text(size =7)) + geom_errorbar(aes(ymin=AVG-SE, ymax=AVG+SE), width=.2, position=position_dodge(0.9)) + theme_classic() + theme(axis.ticks.x.bottom = element_blank()) + theme(axis.text.x = element_blank())+ scale_fill_manual(values = c("darkslategray2", "darkgoldenrod4"))

CH_1_Tax4Fun_Metabolic_Graph

#Genetic Information Processing

CH_1_Tax4Fun_Gen<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/CH_1_Tax4Fun_Genetic_Proc.csv", header = TRUE, check.names = FALSE)
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on '~/
## Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/
## CH_1_Tax4Fun_Genetic_Proc.csv'
CH_1_Tax4Fun_Gen_Graph<-ggplot(data = CH_1_Tax4Fun_Gen, aes(x = Pathways, y =AVG, fill = Location)) + geom_bar(stat="identity", color = "black", position = position_dodge()) + labs (title = "Tax4Fun Microbial Function Predictions", subtitle = "Genetic Information Processing (Replication/Repair or Transcription", y = "", x = "") + theme(axis.text.y = element_text(size =7)) + geom_errorbar(aes(ymin=AVG-SE, ymax=AVG+SE), width=.2, position=position_dodge(0.9)) + theme_classic() + theme(axis.ticks.x.bottom = element_blank()) + theme(axis.text.x = element_blank())+ scale_fill_manual(values = c("darkslategray2", "darkgoldenrod4"))

CH_1_Tax4Fun_Gen_Graph

#ALL
ggarrange(CH_1_Tax4Fun_Env_Sig_Graph, CH_1_Tax4Fun_Cell_Sig_Graph, CH_1_Tax4Fun_Metabolic_Graph, CH_1_Tax4Fun_Gen_Graph, ncol=2, nrow=2, common.legend = TRUE, legend = c("right"))

Significance per KO between Locations

#Export Data for Top KO's that map to a specified pathway
#File name = CH_2_KO_Env_Processing.csv

CH_1_Tax4Fun_Significance<-read.csv(file = "~/Dropbox/bdgallo_MS_SUNYESF/Ch1_3_Data_Files_Analysis/CH_1_Data/Tax4Fun/CH_1_Tax4Fun_significance.csv")

CH_1_T4F_CranNP<-subset(CH_1_Tax4Fun_Significance, Location == "Cranberry NP")
CH_1_T4F_FCLOW_NP<-subset(CH_1_Tax4Fun_Significance, Location == "French Creek NP")

#Environmental Information Processing (Signal Transduction)
#Welch's Two Sample t-test (Location vs. Pathway)

t_test_Env<-t.test(CH_1_T4F_CranNP$Environmental, CH_1_T4F_FCLOW_NP$Environmental, var.equal = FALSE, paired=FALSE)

t_test_Env
## 
##  Welch Two Sample t-test
## 
## data:  CH_1_T4F_CranNP$Environmental and CH_1_T4F_FCLOW_NP$Environmental
## t = -4.3783, df = 21.561, p-value = 0.0002493
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.004572976 -0.001630908
## sample estimates:
##   mean of x   mean of y 
## 0.004616423 0.007718365
#Cellular Processes (Motility)
#Welch's Two Sample t-test (Location vs. Pathway)

t_test_Cell<-t.test(CH_1_T4F_CranNP$Cellular, CH_1_T4F_FCLOW_NP$Cellular, var.equal = FALSE, paired=FALSE)

t_test_Cell
## 
##  Welch Two Sample t-test
## 
## data:  CH_1_T4F_CranNP$Cellular and CH_1_T4F_FCLOW_NP$Cellular
## t = -4.3783, df = 21.561, p-value = 0.0002493
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.004572976 -0.001630908
## sample estimates:
##   mean of x   mean of y 
## 0.004616423 0.007718365
#Metabolic Pathways
#Welch's Two Sample t-test (Location vs. Pathway)

t_test_Metab<-t.test(CH_1_T4F_CranNP$Metabolic, CH_1_T4F_FCLOW_NP$Metabolic, var.equal = FALSE, paired=FALSE)

t_test_Metab
## 
##  Welch Two Sample t-test
## 
## data:  CH_1_T4F_CranNP$Metabolic and CH_1_T4F_FCLOW_NP$Metabolic
## t = 0.89809, df = 10.545, p-value = 0.3892
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.006729713  0.015925050
## sample estimates:
##  mean of x  mean of y 
## 0.02432483 0.01972717
#Genetic Information Processing (Repair/Replication or Transcription)
#Welch's Two Sample t-test (Location vs. Pathway)

t_test_Gen<-t.test(CH_1_T4F_CranNP$Genetic, CH_1_T4F_FCLOW_NP$Genetic, var.equal = FALSE, paired=FALSE)

t_test_Gen
## 
##  Welch Two Sample t-test
## 
## data:  CH_1_T4F_CranNP$Genetic and CH_1_T4F_FCLOW_NP$Genetic
## t = -0.14495, df = 8.6172, p-value = 0.8881
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01332309  0.01172871
## sample estimates:
##  mean of x  mean of y 
## 0.01505936 0.01585655